在之前介紹爬蟲的測試程式,我們是自己將 HTML 另存檔案,再利用 Mockery 來回傳 HTML,因為我們這次的範例只抓了一頁,所以另存新檔我們只需操作一次,但這次我們需要抓10個不同的頁面,這樣是不是很麻煩
所幸有神人開發了 PHPVCR 這個套件,只要我們執行了任何抓網頁的程戈,PHPVCR 就會自動將 HTML 儲存下來,這樣會再撰寫爬蟲就會非常方便
所以我們就接著來安裝 PHPVCR 吧
composer require --dev php-vcr/php-vcr
安裝完畢後,我們就照說明來修改測試程式
原本的測試程式
<?php
namespace Recca0120\Ithome30\Tests;
use Mockery;
use GuzzleHttp\Client;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Psr7\Response;
use PHPUnit\Framework\TestCase;
use Recca0120\Ithome30\PttCrawler;
use Psr\Http\Client\ClientInterface;
class PttCrawlerTest extends TestCase
{
public function test_fetch_board_page()
{
/** @var Mockery\Mock|ClientInterface $httpClient */
$httpClient = Mockery::spy(ClientInterface::class);
$httpClient
->allows('sendRequest')
->andReturn(new Response(200, [], file_get_contents(__DIR__ . '/fixtures/ptt_home.html')));
$crawler = new PttCrawler($httpClient);
$records = $crawler->all();
self::assertEquals([
'name' => 'Gossiping',
"nuser" => '12185',
'class' => '綜合',
'title' => '[八卦]不停重複今日公祭明日忘記',
], $records[0]);
$httpClient->shouldHaveReceived('sendRequest')->once()->with(Mockery::on(function (Request $request) {
return (((string)$request->getUri()) === 'https://www.ptt.cc/bbs/hotboards.html');
}));
}
public function test_guzzle_client()
{
$this->markTestSkipped();
$crawler = new PttCrawler(new Client());
$records = $crawler->all();
self::assertEquals([
'name' => 'Gossiping',
"nuser" => '12185',
'class' => '綜合',
'title' => '[八卦]不停重複今日公祭明日忘記',
], $records[0]);
}
}
修改後的測試程式
<?php
namespace Recca0120\Ithome30\Tests;
use Mockery;
use GuzzleHttp\Client;
use GuzzleHttp\Psr7\Request;
use PHPUnit\Framework\TestCase;
use Recca0120\Ithome30\PttCrawler;
use Psr\Http\Client\ClientInterface;
class PttCrawlerTest extends TestCase
{
public function test_fetch_board_page()
{
// 這兩行啟用 PHPVCR
\VCR\VCR::turnOn();
\VCR\VCR::insertCassette('ptt_home.yaml');
/** @var Mockery\Mock|ClientInterface $httpClient */
// 改注入真實 Client
$httpClient = Mockery::spy(new Client());
$crawler = new PttCrawler($httpClient);
$records = $crawler->all();
self::assertEquals([
'name' => 'Gossiping',
"nuser" => '12185',
'class' => '綜合',
'title' => '[八卦]不停重複今日公祭明日忘記',
], $records[0]);
$httpClient->shouldHaveReceived('sendRequest')->once()->with(Mockery::on(function (Request $request) {
return (((string)$request->getUri()) === 'https://www.ptt.cc/bbs/hotboards.html');
}));
// 這兩行退出 PHPVCR 以免影響其他測試
\VCR\VCR::eject();
\VCR\VCR::turnOff();
}
}
因為 PHPVCR 是直接 hook PHP 的 curl, stream_wrapper,只要 PHPVCR 直接啟用後,我們就可以直接注入真實的 Client 來執行測試(順便可以把test_guzzle_client這個測試案例移除),修改完畢後,我們執行一次測試看會發生什麼情況
fixtures 的資料夾內會產生一個 ptt_home.yaml 的檔案,而且會亮紅燈,我們先來觀察看看 ptt_home.yaml 會得到什麼樣的內容
-
request:
method: GET
url: 'https://www.ptt.cc/bbs/hotboards.html'
headers:
Host: www.ptt.cc
Accept-Encoding: ''
User-Agent: GuzzleHttp/7
Accept: ''
response:
status:
code: 200
message: OK
headers:
Date: 'Fri, 29 Sep 2023 15:09:05 GMT'
Content-Type: 'text/html; charset=utf-8'
Transfer-Encoding: chunked
Connection: keep-alive
strict-transport-security: max-age=0
CF-Cache-Status: DYNAMIC
Report-To: '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=%2F43oH3EemFbyAoZru0qjiU9c37mqcLqBc%2F%2FnlsBB78IyMXBGzV7A6BLn71NNVWe3R%2B6JHNtKDWl%2FBBN25oZtU0kF3pXWZL7KZjCzkQqMsd7t9goQqIitwjmICs45"}],"group":"cf-nel","max_age":604800}'
NEL: '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}'
X-Content-Type-Options: nosniff
Server: cloudflare
CF-RAY: 80e5252a0b59e07a-NRT
alt-svc: 'h3=":443"; ma=86400'
body: "[html code]"
curl_info:
url: 'https://www.ptt.cc/bbs/hotboards.html'
content_type: 'text/html; charset=utf-8'
http_code: 200
header_size: 644
request_size: 80
filetime: -1
ssl_verify_result: 0
redirect_count: 0
total_time: 0.459413
namelookup_time: 0.038673
connect_time: 0.130057
pretransfer_time: 0.212264
size_upload: 0.0
size_download: 54174.0
speed_download: 117920.0
speed_upload: 0.0
download_content_length: -1.0
upload_content_length: 0.0
starttransfer_time: 0.376152
redirect_time: 0.0
redirect_url: ''
primary_ip: 172.64.167.22
certinfo: { }
primary_port: 443
local_ip: 192.168.15.243
local_port: 55102
http_version: 2
protocol: 2
ssl_verifyresult: 0
scheme: HTTPS
appconnect_time_us: 212234
connect_time_us: 130057
namelookup_time_us: 38673
pretransfer_time_us: 212264
redirect_time_us: 0
starttransfer_time_us: 376152
total_time_us: 459413
index: 0
這時我們會看到 ptt_home.yaml 是完整記錄了完整的 response header 及 body,遠比我們自己手動記錄還要完整,所以很推薦使用!最後我們只需再將 expect 的修改一下,讓測試成為綠燈即可。
或許有人會有疑問 PHPVCR 這麼好用為什麼不一開始就介紹?下一篇我們先來聊聊為什麼不一開始就拿出來的原因